<!DOCTYPE HTML>
<html lang="en" >
    <!-- Start book Python数据分析课程讲义 -->
    <head>
        <!-- head:start -->
        <meta charset="UTF-8">
        <meta http-equiv="X-UA-Compatible" content="IE=edge" />
        <title>实战案例：全球食品数据分析 | Python数据分析课程讲义</title>
        <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
        <meta name="description" content="">
        <meta name="generator" content="GitBook 2.6.7">
        <meta name="author" content="BigCat">
        
        <meta name="HandheldFriendly" content="true"/>
        <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
        <meta name="apple-mobile-web-app-capable" content="yes">
        <meta name="apple-mobile-web-app-status-bar-style" content="black">
        <link rel="apple-touch-icon-precomposed" sizes="152x152" href="../../gitbook/images/apple-touch-icon-precomposed-152.png">
        <link rel="shortcut icon" href="../../gitbook/images/favicon.ico" type="image/x-icon">
        
    <link rel="stylesheet" href="../../gitbook/style.css">
    
        
        <link rel="stylesheet" href="../../gitbook/plugins/gitbook-plugin-tbfed-pagefooter/footer.css">
        
    
        
        <link rel="stylesheet" href="../../gitbook/plugins/gitbook-plugin-splitter/splitter.css">
        
    
        
        <link rel="stylesheet" href="../../gitbook/plugins/gitbook-plugin-toggle-chapters/toggle.css">
        
    
        
        <link rel="stylesheet" href="../../gitbook/plugins/gitbook-plugin-highlight/website.css">
        
    
        
        <link rel="stylesheet" href="../../gitbook/plugins/gitbook-plugin-fontsettings/website.css">
        
    
    

        
    
    
    <link rel="next" href="../../file/part04/4.html" />
    
    
    <link rel="prev" href="../../file/part03/3.9.html" />
    

        <!-- head:end -->
    </head>
    <body>
        <!-- body:start -->
        
    <div class="book"
        data-level="3.10"
        data-chapter-title="实战案例：全球食品数据分析"
        data-filepath="file/part03/3.10.md"
        data-basepath="../.."
        data-revision="Thu Apr 27 2017 00:50:19 GMT+0800 (CST)"
        data-innerlanguage="">
    

<div class="book-summary">
    <nav role="navigation">
        <ul class="summary">
            
            
            
            

            

            
    
        <li class="chapter " data-level="0" data-path="index.html">
            
                
                    <a href="../../index.html">
                
                        <i class="fa fa-check"></i>
                        
                        传智播客Python学院数据分析
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1" data-path="file/part01/1.html">
            
                
                    <a href="../../file/part01/1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.</b>
                        
                        一、工作环境准备及数据分析建模理论基础
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="1.1" data-path="file/part01/1.1.html">
            
                
                    <a href="../../file/part01/1.1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.1.</b>
                        
                        Python 3.x新特性和编码回顾
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1.2" data-path="file/part01/1.2.html">
            
                
                    <a href="../../file/part01/1.2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.2.</b>
                        
                        DIKW模型与数据工程
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1.3" data-path="file/part01/1.3.html">
            
                
                    <a href="../../file/part01/1.3.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.3.</b>
                        
                        数据分析建模理论基础
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="2" data-path="file/part02/2.html">
            
                
                    <a href="../../file/part02/2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.</b>
                        
                        二、科学计算工具NumPy
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="2.1" data-path="file/part02/2.1.html">
            
                
                    <a href="../../file/part02/2.1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.1.</b>
                        
                        ndarray的创建与数据类型
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.2" data-path="file/part02/2.2.html">
            
                
                    <a href="../../file/part02/2.2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.2.</b>
                        
                        ndarray的矩阵处理
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.3" data-path="file/part02/2.3.html">
            
                
                    <a href="../../file/part02/2.3.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.3.</b>
                        
                        ndarray的元素处理
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.4" data-path="file/part02/2.4.html">
            
                
                    <a href="../../file/part02/2.4.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.4.</b>
                        
                        实战案例：2016美国总统大选民意调查统计
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="3" data-path="file/part03/3.html">
            
                
                    <a href="../../file/part03/3.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.</b>
                        
                        三、数据分析工具Pandas
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="3.1" data-path="file/part03/3.1.html">
            
                
                    <a href="../../file/part03/3.1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.1.</b>
                        
                        Pandas的数据结构
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.2" data-path="file/part03/3.2.html">
            
                
                    <a href="../../file/part03/3.2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.2.</b>
                        
                        Pandas的索引操作
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.3" data-path="file/part03/3.3.html">
            
                
                    <a href="../../file/part03/3.3.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.3.</b>
                        
                        Pandas的对齐运算
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.4" data-path="file/part03/3.4.html">
            
                
                    <a href="../../file/part03/3.4.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.4.</b>
                        
                        Pandas的函数应用
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.5" data-path="file/part03/3.5.html">
            
                
                    <a href="../../file/part03/3.5.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.5.</b>
                        
                        Pandas的层级索引
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.6" data-path="file/part03/3.6.html">
            
                
                    <a href="../../file/part03/3.6.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.6.</b>
                        
                        Pandas统计计算和描述
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.7" data-path="file/part03/3.7.html">
            
                
                    <a href="../../file/part03/3.7.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.7.</b>
                        
                        Pandas分组与聚合
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.8" data-path="file/part03/3.8.html">
            
                
                    <a href="../../file/part03/3.8.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.8.</b>
                        
                        数据清洗、合并、转化和重构
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.9" data-path="file/part03/3.9.html">
            
                
                    <a href="../../file/part03/3.9.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.9.</b>
                        
                        聚类模型 -- K-Means介绍
                    </a>
            
            
        </li>
    
        <li class="chapter active" data-level="3.10" data-path="file/part03/3.10.html">
            
                
                    <a href="../../file/part03/3.10.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.10.</b>
                        
                        实战案例：全球食品数据分析
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="4" data-path="file/part04/4.html">
            
                
                    <a href="../../file/part04/4.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.</b>
                        
                        四、数据可视化工具
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="4.1" data-path="file/part04/4.1.html">
            
                
                    <a href="../../file/part04/4.1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.1.</b>
                        
                        Matplotlib绘图
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="4.2" data-path="file/part04/4.2.html">
            
                
                    <a href="../../file/part04/4.2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.2.</b>
                        
                        Seaborn绘图
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="4.3" data-path="file/part04/4.3.html">
            
                
                    <a href="../../file/part04/4.3.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.3.</b>
                        
                        Bokeh绘图
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="4.4" data-path="file/part04/4.4.html">
            
                
                    <a href="../../file/part04/4.4.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.4.</b>
                        
                        实战案例：世界高峰数据可视化
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="5" data-path="file/part06/6.html">
            
                
                    <a href="../../file/part06/6.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.</b>
                        
                        五、自然语言处理NLTK
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="5.1" data-path="file/part06/6.1.html">
            
                
                    <a href="../../file/part06/6.1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.1.</b>
                        
                        NLTK与自然语言处理基础
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="5.2" data-path="file/part06/6.2.html">
            
                
                    <a href="../../file/part06/6.2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.2.</b>
                        
                        jieba分词
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="5.3" data-path="file/part06/6.3.html">
            
                
                    <a href="../../file/part06/6.3.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.3.</b>
                        
                        情感分析
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="5.4" data-path="file/part06/6.4.html">
            
                
                    <a href="../../file/part06/6.4.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.4.</b>
                        
                        文本相似度和分类
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="5.5" data-path="file/part06/6.6.html">
            
                
                    <a href="../../file/part06/6.6.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.5.</b>
                        
                        实战案例：微博情感分析
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    


            
            <li class="divider"></li>
            <li>
                <a href="https://www.gitbook.com" target="blank" class="gitbook-link">
                    Published with GitBook
                </a>
            </li>
            
        </ul>
    </nav>
</div>

    <div class="book-body">
        <div class="body-inner">
            <div class="book-header" role="navigation">
    <!-- Actions Left -->
    

    <!-- Title -->
    <h1>
        <i class="fa fa-circle-o-notch fa-spin"></i>
        <a href="../../" >Python数据分析课程讲义</a>
    </h1>
</div>

            <div class="page-wrapper" tabindex="-1" role="main">
                <div class="page-inner">
                
                
                    <section class="normal" id="section-">
                    
                        <h2 id="&#x5168;&#x7403;&#x98DF;&#x54C1;&#x6570;&#x636E;&#x5206;&#x6790;">&#x5168;&#x7403;&#x98DF;&#x54C1;&#x6570;&#x636E;&#x5206;&#x6790;</h2>
<blockquote>
<p>&#x9879;&#x76EE;&#x53C2;&#x8003;&#xFF1A;<a href="https://www.kaggle.com/bhouwens/d/openfoodfacts/world-food-facts/how-much-sugar-do-we-eat/discussion" target="_blank">https://www.kaggle.com/bhouwens/d/openfoodfacts/world-food-facts/how-much-sugar-do-we-eat/discussion</a></p>
</blockquote>
<pre><code class="lang-python"><span class="hljs-comment"># -*- coding : utf-8 -*-</span>

<span class="hljs-comment"># &#x5904;&#x7406;zip&#x538B;&#x7F29;&#x6587;&#x4EF6;</span>
<span class="hljs-keyword">import</span> zipfile
<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd
<span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt


<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">unzip</span><span class="hljs-params">(zip_filepath, dest_path)</span>:</span>
    <span class="hljs-string">&quot;&quot;&quot;
        &#x89E3;&#x538B;zip&#x6587;&#x4EF6;
    &quot;&quot;&quot;</span>
    <span class="hljs-keyword">with</span> zipfile.ZipFile(zip_filepath) <span class="hljs-keyword">as</span> zf:
        zf.extractall(path=dest_path)


<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">get_dataset_filename</span><span class="hljs-params">(zip_filepath)</span>:</span>
    <span class="hljs-string">&quot;&quot;&quot;
            &#x83B7;&#x53D6;&#x6570;&#x636E;&#x96C6;&#x6587;&#x4EF6;&#x540D;
    &quot;&quot;&quot;</span>
    <span class="hljs-keyword">with</span> zipfile.ZipFile(zip_filepath) <span class="hljs-keyword">as</span> zf:
        <span class="hljs-keyword">return</span> zf.namelist()[<span class="hljs-number">0</span>]


<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">run_main</span><span class="hljs-params">()</span>:</span>
    <span class="hljs-string">&quot;&quot;&quot;
        &#x4E3B;&#x51FD;&#x6570;
    &quot;&quot;&quot;</span>
    <span class="hljs-comment"># &#x58F0;&#x660E;&#x53D8;&#x91CF;</span>
    dataset_path = <span class="hljs-string">&apos;./data&apos;</span>  <span class="hljs-comment"># &#x6570;&#x636E;&#x96C6;&#x8DEF;&#x5F84;</span>
    zip_filename = <span class="hljs-string">&apos;open-food-facts.zip&apos;</span>  <span class="hljs-comment"># zip&#x6587;&#x4EF6;&#x540D;</span>
    zip_filepath = os.path.join(dataset_path, zip_filename)  <span class="hljs-comment"># zip&#x6587;&#x4EF6;&#x8DEF;&#x5F84;</span>
    dataset_filename = get_dataset_filename(zip_filepath)  <span class="hljs-comment"># &#x6570;&#x636E;&#x96C6;&#x6587;&#x4EF6;&#x540D;&#xFF08;&#x5728;zip&#x4E2D;&#xFF09;</span>
    dataset_filepath = os.path.join(dataset_path, dataset_filename)  <span class="hljs-comment"># &#x6570;&#x636E;&#x96C6;&#x6587;&#x4EF6;&#x8DEF;&#x5F84;</span>

    print(<span class="hljs-string">&apos;&#x89E3;&#x538B;zip...&apos;</span>, end=<span class="hljs-string">&apos;&apos;</span>)
    unzip(zip_filepath, dataset_path)
    print(<span class="hljs-string">&apos;&#x5B8C;&#x6210;.&apos;</span>)

    <span class="hljs-comment"># &#x8BFB;&#x53D6;&#x6570;&#x636E;</span>
    data = pd.read_csv(dataset_filepath, usecols=[<span class="hljs-string">&apos;countries_en&apos;</span>, <span class="hljs-string">&apos;additives_n&apos;</span>])

    <span class="hljs-comment"># &#x5206;&#x6790;&#x5404;&#x56FD;&#x5BB6;&#x98DF;&#x7269;&#x4E2D;&#x7684;&#x98DF;&#x54C1;&#x6DFB;&#x52A0;&#x5242;&#x79CD;&#x7C7B;&#x4E2A;&#x6570;</span>
    <span class="hljs-comment"># 1. &#x6570;&#x636E;&#x6E05;&#x7406;</span>
    <span class="hljs-comment"># &#x53BB;&#x9664;&#x7F3A;&#x5931;&#x6570;&#x636E;</span>
    data = data.dropna()    <span class="hljs-comment"># &#x6216;&#x8005;data.dropna(inplace=True)</span>

    <span class="hljs-comment"># &#x5C06;&#x56FD;&#x5BB6;&#x540D;&#x79F0;&#x8F6C;&#x6362;&#x4E3A;&#x5C0F;&#x5199;</span>
    <span class="hljs-comment"># &#x8BFE;&#x540E;&#x7EC3;&#x4E60;&#xFF1A;&#x7ECF;&#x8FC7;&#x89C2;&#x5BDF;&#x53D1;&#x73B0;&apos;countries_en&apos;&#x4E2D;&#x7684;&#x6570;&#x503C;&#x4E0D;&#x662F;&#x5355;&#x72EC;&#x7684;&#x56FD;&#x5BB6;&#x540D;&#x79F0;&#xFF0C;</span>
    <span class="hljs-comment"># &#x6709;&#x7684;&#x662F;&#x591A;&#x4E2A;&#x56FD;&#x5BB6;&#x540D;&#x79F0;&#x7528;&#x9017;&#x53F7;&#x9694;&#x5F00;&#xFF0C;&#x5982; Albania,Belgium,France,Germany,Italy,Netherlands,Spain</span>
    <span class="hljs-comment"># &#x6B63;&#x786E;&#x7684;&#x7EDF;&#x8BA1;&#x5E94;&#x8BE5;&#x662F;&#x5C06;&#x8FD9;&#x4E9B;&#x503C;&#x62C6;&#x5F00;&#x6210;&#x591A;&#x4E2A;&#x884C;&#x8BB0;&#x5F55;&#xFF0C;&#x7136;&#x540E;&#x8FDB;&#x884C;&#x5206;&#x7EC4;&#x7EDF;&#x8BA1;</span>
    data[<span class="hljs-string">&apos;countries_en&apos;</span>] = data[<span class="hljs-string">&apos;countries_en&apos;</span>].str.lower()

    <span class="hljs-comment"># 2. &#x6570;&#x636E;&#x5206;&#x7EC4;&#x7EDF;&#x8BA1;</span>
    country_additives = data[<span class="hljs-string">&apos;additives_n&apos;</span>].groupby(data[<span class="hljs-string">&apos;countries_en&apos;</span>]).mean()

    <span class="hljs-comment"># 3. &#x6309;&#x503C;&#x4ECE;&#x5927;&#x5230;&#x5C0F;&#x6392;&#x5E8F;</span>
    result = country_additives.sort_values(ascending=<span class="hljs-keyword">False</span>)

    <span class="hljs-comment"># 4. pandas&#x53EF;&#x89C6;&#x5316;top10</span>
    result.iloc[:<span class="hljs-number">10</span>].plot.bar()
    plt.show()

    <span class="hljs-comment"># 5. &#x4FDD;&#x5B58;&#x5904;&#x7406;&#x7ED3;&#x679C;</span>
    result.to_csv(<span class="hljs-string">&apos;./country_additives.csv&apos;</span>)

    <span class="hljs-comment"># &#x5220;&#x9664;&#x89E3;&#x538B;&#x6570;&#x636E;&#xFF0C;&#x6E05;&#x7406;&#x7A7A;&#x95F4;</span>
    <span class="hljs-keyword">if</span> os.path.exists(dataset_filepath):
        os.remove(dataset_filepath)

<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&apos;__main__&apos;</span>:
    run_main()
</code></pre>
<footer class="page-footer"><span class="copyright">Copyright &#xA9; BigCat all right reserved&#xFF0C;powered by Gitbook</span><span class="footer-modification">&#x300C;Revision Time:
2017-04-25 23:12:01&#x300D;
</span></footer>
                    
                    </section>
                
                
                </div>
            </div>
        </div>

        
        <a href="../../file/part03/3.9.html" class="navigation navigation-prev " aria-label="Previous page: 聚类模型 -- K-Means介绍"><i class="fa fa-angle-left"></i></a>
        
        
        <a href="../../file/part04/4.html" class="navigation navigation-next " aria-label="Next page: 四、数据可视化工具"><i class="fa fa-angle-right"></i></a>
        
    </div>
</div>

        
<script src="../../gitbook/app.js"></script>

    
    <script src="../../gitbook/plugins/gitbook-plugin-splitter/splitter.js"></script>
    

    
    <script src="../../gitbook/plugins/gitbook-plugin-toggle-chapters/toggle.js"></script>
    

    
    <script src="../../gitbook/plugins/gitbook-plugin-fontsettings/buttons.js"></script>
    

    
    <script src="../../gitbook/plugins/gitbook-plugin-livereload/plugin.js"></script>
    

<script>
require(["gitbook"], function(gitbook) {
    var config = {"disqus":{"shortName":"gitbookuse"},"github":{"url":"https://github.com/dododream"},"search-pro":{"cutWordLib":"nodejieba","defineWord":["gitbook-use"]},"sharing":{"weibo":true,"facebook":true,"twitter":true,"google":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"tbfed-pagefooter":{"copyright":"Copyright © BigCat","modify_label":"「Revision Time:","modify_format":"YYYY-MM-DD HH:mm:ss」"},"baidu":{"token":"ff100361cdce95dd4c8fb96b4009f7bc"},"sitemap":{"hostname":"http://www.treenewbee.top"},"donate":{"wechat":"http://weixin.png","alipay":"http://alipay.png","title":"","button":"赏","alipayText":"支付宝打赏","wechatText":"微信打赏"},"edit-link":{"base":"https://github.com/dododream/edit","label":"Edit This Page"},"splitter":{},"toggle-chapters":{},"highlight":{},"fontsettings":{"theme":"white","family":"sans","size":2},"livereload":{}};
    gitbook.start(config);
});
</script>

        <!-- body:end -->
    </body>
    <!-- End of book Python数据分析课程讲义 -->
</html>
